/*
Content: Preparation of main dataset
Paper: "Immigrants Return Intentions and Labor Market Behavior when the Home Country is Unsafe"
Authors: Jacopo Bassetto, Teresa Freitas-Monteiro
*/


		
************************************************************
**# LOAD RAW DATA FROM GSOEP ********************
************************************************************
use "${final}/rawdata_soep.dta", clear
drop if pid==.
drop if corigin==.
rename pmonin imonth

* Interview date 			
foreach var in hlk0059 ptagin hlk0059 {
replace `var' = 15 if `var' == .
}

gen startdate = mdy(imonth,hlk0059,syear)
gen startdate2 =  mdy(imonth,ptagin,syear)
gen startdate3 =  mdy(hlk0060,hlk0059,syear)
gen startdate4 = mdy(pgmonth,hlk0059,syear)
gen startdate5 =  mdy(pgmonth,ptagin,syear)


replace startdate = startdate2 if startdate == .
replace startdate = startdate3 if startdate == .
replace startdate = startdate4 if startdate == .
replace startdate = startdate5 if startdate == .
	
g idate =ym(syear, imonth)
drop if startdate==.

* Country of origin

cap drop soep_corigin_id
clonevar soep_corigin_id = corigin


	* Fix the soep_corigin_id from the SOEP the vlaues for kosovo and albania and eastern europe
		* 128 Malaysia 
		replace soep_corigin_id = 104 if soep_corigin_id == 128
		* 155 Turkmenistan
		replace soep_corigin_id = 91 if soep_corigin_id == 155
		* 7 Germany
		replace soep_corigin_id = 1 if soep_corigin_id == 7
		* USA
		replace soep_corigin_id = 18 if soep_corigin_id == 164
		 *168 Montenegro  
		replace soep_corigin_id = 106 if soep_corigin_id == 168
		*145 Mongolia  129 Samoa missing
	
* Reduce dataset to key variables
drop pg* plb* hlk* plj* plh* bire* bii* pla* pab* plc* pld* plf* ple* plg* pli* plm* p_k* p_n* p_buh* hlc* hlf* hlf* hli* hlj* hac* hld* hlh*  hle* pag* pka* plk* p_is* p_a* living* f*8 m*8 f*92 m*92 pdaue* pinta* p_f* p_d* ic*
	
*******************************************************
***  MERGE SOEP WITH MONTHLY EVENT DATA *************
******************************************************

* Matching date - Month-Year of interview with Month-Year of Event
g year=year(startdate)
g month=month(startdate)
g startmy=ym(year, month)
format startmy %tm

drop if syear==. // ATENTION DROP WHEN NO SURVEY DATA
drop  year month 
cap drop obs

fmerge m:1 soep_corigin_id startmy  using "$globalterror/final\terror_monthlydata_soep_final.dta", gen(eventmerge_m)
drop if eventmerge_m==2
cap drop obs
cap drop year month startmy

g year=year(startdate)
g month=month(startdate)
g startmy=ym(year, month)
format startmy %tm


******************************************
**** INTERVIEWED PRE-POST EVENT ******
******************************************
			
*Pre-post event
foreach x in 30 60 90 {
foreach event in terror  armedconflict {	
foreach var in	 m5y m4y m3y {		
cap drop post`event'_`var'_p`x' 
g post`event'_`var'_p`x' =0 if distance`event'_`var'_p`x' <=0
replace post`event'_`var'_p`x' =1 if distance`event'_`var'_p`x'>=1 & distance`event'_`var'_p`x'!=.
}
}
}


foreach event in terror     {	
foreach var in	 m5y m4y m3y  {
label def post`event'_`var'_p90 0 "Pre-Terror" 1 "Post-Terror", replace
label val post`event'_`var'_p90 post`event'_`var'_p90
label var post`event'_`var'_p90 "Terror"
}
}


					
*****************************************************************************
** Keep only events were there are people in the control and treatment group
*****************************************************************************

* I do this with the share of interviews before and after - if share =1 means all interviews took place in the  pre-period or all in post period
		
foreach var in	m5y m4y m3y {
foreach x in	90 60 30 {
* Sum by country of origin, event date and pre-porst, sum number of interviews
cap drop prepostinterterror_`var'_p`x' 
cap drop totinterterror_`var'_p`x'  
cap drop ratiointerterror_`var'_p`x'
cap drop mratiointerterror_`var'_p`x'
qui bys soep_corigin_id  postterror_`var'_p`x' relevantterror_`var'_p`x' : gen prepostinterterror_`var'_p`x'  = _N   if distanceterror_`var'_p`x'!=. // for a given country and event, this sums total interviews in pre-period (e.g. among control group) and post peridod (e.g. among treatment group)
qui bys soep_corigin_id relevantterror_`var'_p`x' : gen totinterterror_`var'_p`x'  = _N  if distanceterror_`var'_p`x'!=. // // for a given country and event, this sums total interviews 
qui g ratiointerterror_`var'_p`x'= prepostinterterror_`var'_p`x'/totinterterror_`var'_p`x' if distanceterror_`var'_p`x'!=.
qui bys soep_corigin_id relevantterror_`var'_p`x' : egen mratiointerterror_`var'_p`x'=max(ratiointerterror_`var'_p`x') if ratiointerterror_`var'_p`x'!=.
cap drop prepostinterterror_`var'_p`x' totinterterror_`var'_p`x'
}
}

 
***************************
*** SAMPLE  ***
***************************
**** ATTENTON: match sample time period to IEB
drop if syear>2018
drop if syear<2000
drop if soep_corigin_id==1 // drop Germans


*** Keep sample	with and without controls constant
foreach var in	m5y m4y m3y {
foreach x in	90 60 30 {
egen clusterterror_`var'_p`x'=group(relevantterror_`var'_p`x')
}
}



	eststo clear
local i=1				
foreach var in	 m5y m4y m3y  {

global select_reg_M "!inlist(soep_corigin_id,.,1) & inrange(distanceterror_`var'_p90,-3,3) & distanceterror_`var'_p90!= 0 &  mratiointerterror_m3y_p90!=1"
global controls "i.soep_corigin_id i.soep_corigin_id#i.syear i.month#i.syear i.bula" 	

qui: reghdfe remain_ger_per postterror_`var'_p90  i.sex c.age c.ysm##c.ysm i.maritstat i.educ_aftm2 i.child if $select_reg_M, vce(cluster clusterterror_`var'_p90) absorb($controls)
cap drop  sample_terror_`var'
 qui:  g sample_terror_`var'=e(sample)
			
}



********** COMPUTE NUMBER OF RELEVANT AND ISOLATED EVENTS STATS ********** 

preserve
	eststo clear
local i=1				
	foreach event in terror     {	
foreach var in	 m3y {
	global select_reg_M "!inlist(soep_corigin_id,.,1) & inrange(distanceterror_`var'_p90,-3,3)  &  mratiointerterror_m3y_p90!=1"
    global controls "i.soep_corigin_id i.soep_corigin_id#i.syear i.month#i.syear i.bula" 	

reghdfe remain_ger_per postterror_`var'_p90  i.sex c.age c.ysm##c.ysm i.maritstat i.educ_aftm2 i.child if $select_reg_M , vce(cluster clusterterror_`var'_p90) absorb($controls)
cap drop  sample_terror_`var'
 g sample_terror_`var'=e(sample)			
}
}

g date_match=startmy if distanceterror_m3y_p90==0
sort  soep_corigin_id startmy
format date_match %tm
replace date_match=startmy-1 if distanceterror_m3y_p90==1
replace date_match=startmy+1 if distanceterror_m3y_p90==-1
replace date_match=startmy-2 if distanceterror_m3y_p90==2
replace date_match=startmy+2 if distanceterror_m3y_p90==-2
replace date_match=startmy-3 if distanceterror_m3y_p90==3
replace date_match=startmy+3 if distanceterror_m3y_p90==-3
drop startmy
rename date_match  startmy

keep if sample_terror_m3y==1
g variable=1
collapse (sum) variable, by(soep_corigin_id startmy)
g eventusedsoep=1
drop variable
save "$globalterror/final/eventsusedinsoep_final.dta", replace
restore 

save "$final/finaldata_terrorsoep_light_final.dta", replace
					
					
					
					
********************************************************************************
**# ADD POLITICAL STABILITY INDICATORS
********************************************************************************

use "$final/finaldata_terrorsoep_light_final.dta", clear

** Get country labels lost at some point
preserve 
use "$final/rawdata_soep.dta", clear
keep corigin
bysort corigin: g obs=_n
keep if obs==1
drop if corigin==.
drop obs
tempfile origin
save `origin'
restore 
cap drop drop _merge
merge m:1 corigin using `origin'

**************************************
*** PAST TERRROR MEANS *******
**************************************
preserve
use "$globalterror/final/terror_monthlydata_soep_final.dta" , clear
br soep_corigin_id startmy distanceterror_m3y_p90 //distanceterror_m1y_p90
		foreach var in  terror  {
			* Generate differnce with respect to the yearly average of the previous year 
			bys soep_corigin_id (startmy): gen mean_`var'_m1y =`var'_ya[_n-12]
			bys soep_corigin_id (startmy): gen mean_`var'_m3y = (`var'_ya[_n-12] + `var'_ya[_n-24] + `var'_ya[_n-36])/3
		}
		
foreach var in	 m3y m1y {
g temp2=mean_terror_`var' if distanceterror_m3y_p90==0 // number of killed in the month of the attack
bysort identterror_m3y_p90: egen eventmean_terror_`var'=max(temp2) // by event assign to treated and control
sort soep_c startmy
drop temp2
}

compress


	
tempfile datsetmeans
save `datsetmeans'
restore


cap drop _merge
merge m:1 soep_corigin_id startmy using  `datsetmeans', keepusing(eventmean_terror_*) keep(1 3)
drop _merge

foreach x in eventmean_terror_m1y eventmean_terror_m3y  {
cap drop `x'_r
g `x'_r=round(`x')
}
	
	rename eventmean_terror_*_r mean_terror_*_r
	

	g event_scale_nr_v1=2 if inrange(eventmean_terror_m3y,0,12)
	replace event_scale_nr_v1=3 if eventmean_terror_m3y==0
	replace event_scale_nr_v1=1 if eventmean_terror_m3y>12 & eventmean_terror_m3y!=.
	label val event_scale_nr_v1 event_scale_v1
	

	cap drop event_scale_nr_m1y_v1
	g event_scale_nr_m1y_v1=2 if inrange(eventmean_terror_m1y,0,12)
	replace event_scale_nr_m1y_v1=3 if eventmean_terror_m1y==0
	replace event_scale_nr_m1y_v1=1 if eventmean_terror_m1y>12 & eventmean_terror_m1y!=.
	label val event_scale_nr_m1y_v1 event_scale_v1

	


********************************** 
*** Political Stability Index *** 
********************************** 
cap drop _merge
merge m:1 corigin syear using "$path/Daten\extradatasets\politicalstability_short.dta", keep(1 3)


cap drop rank_25_50_75 rank_25_50_75_m*

foreach x in 1 3 4 5 {
cap drop mean_PolStab_rank_m`x'y_r*
 g mean_PolStab_rank_m`x'y_r=round(mean_PolStab_rank_m`x'y)
}

 g PolStab_rank_r=round(PolStab_rank)
 
 cap drop rank_25_50_75 rank_25_50_75*
g rank_25_50_75=2 if inrange(PolStab_rank,25,75)
replace rank_25_50_75=1 if PolStab_rank<=25
replace rank_25_50_75=3 if PolStab_rank>=75

	foreach x in 1 3 4 5 {
g rank_25_50_75_m`x'y=2 if inrange(mean_PolStab_rank_m`x'y,25,75)
replace rank_25_50_75_m`x'y=1 if mean_PolStab_rank_m`x'y<=25
replace rank_25_50_75_m`x'y=3 if mean_PolStab_rank_m`x'y>=75
}

foreach x in 1 3 4 5 {
	cap drop rank2_25_50_75_m`x'y
g rank2_25_50_75_m`x'y=3 if inrange(mean_PolStab_rank_m`x'y,50,75)
replace rank2_25_50_75_m`x'y=2 if inrange(mean_PolStab_rank_m`x'y,25,50)
replace rank2_25_50_75_m`x'y=1 if mean_PolStab_rank_m`x'y<=25
replace rank2_25_50_75_m`x'y=4 if mean_PolStab_rank_m`x'y>75 & mean_PolStab_rank_m`x'y!=.
}


g rank_25_50_75_r=2 if inrange(PolStab_rank_r,25,75)
replace rank_25_50_75_r=1 if PolStab_rank_r<=25
replace rank_25_50_75_r=3 if PolStab_rank_r>=75

	foreach x in 1 3 4 5 {
g rank_25_50_75_m`x'y_r=2 if inrange(mean_PolStab_rank_m`x'y_r,25,75)
replace rank_25_50_75_m`x'y_r=1 if mean_PolStab_rank_m`x'y_r<=25
replace rank_25_50_75_m`x'y_r=3 if mean_PolStab_rank_m`x'y_r>=75
}

foreach x in 1 3 4 5 {
	cap drop rank2_25_50_75_m`x'y
g rank2_25_50_75_m`x'y_r=3 if inrange(mean_PolStab_rank_m`x'y_r,50,75)
replace rank2_25_50_75_m`x'y_r=2 if inrange(mean_PolStab_rank_m`x'y_r,25,50)
replace rank2_25_50_75_m`x'y_r=1 if mean_PolStab_rank_m`x'y_r<=25
replace rank2_25_50_75_m`x'y_r=4 if mean_PolStab_rank_m`x'y_r>75 & mean_PolStab_rank_m`x'y_r!=.
}


label def rank_25_50_75 1 "Pol. Stab., <=25" 2 "Pol. Stab., 25-75" 3 "Pol. Stab., >=75", replace
label val rank_25_50_75 rank_25_50_75
label val rank_25_50_75_m1y rank_25_50_75
label val rank_25_50_75_m3y rank_25_50_75
label val rank_25_50_75_r rank_25_50_75
label val rank_25_50_75_m1y_r rank_25_50_75
label val rank_25_50_75_m3y_r rank_25_50_75 

label def rank2_25_50_75 1 "Pol. Stab., <=25" 2 "Pol. Stab., 25-50" 3 "Pol. Stab., 50-75" 4 "Pol. Stab., >75", replace
label val rank2_25_50_75_m1y rank2_25_50_75
label val rank2_25_50_75_m3y rank2_25_50_75
label val rank2_25_50_75_m1y_r rank2_25_50_75
label val rank2_25_50_75_m3y_r rank2_25_50_75


save "$final/finaldata_terrorsoep_light_final.dta", replace




 